Comparison of top significant genes detected by SpatialDE (from Stephanie’s script sce_spatialDE.Rmd) vs. our previous set of highly variable genes (HVGs).
Note that HVGs were previously calculated from all samples combined.
SpatialDE genes were calculated from sample 151673 only, with subsampling to 1500 spots due to slow runtime for SpatialDE (SpatialDE does not scale well with number of spots).
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(scales))
Load original object containing HVGs, which were calculated from all samples combined.
# load scran output file
load("../../data/Human_DLPFC_Visium_processedData_sce_scran.Rdata")
sce
## class: SingleCellExperiment
## dim: 33538 47681
## metadata(1): image
## assays(2): counts logcounts
## rownames(33538): ENSG00000243485 ENSG00000237613 ... ENSG00000277475
## ENSG00000268674
## rowData names(5): gene_id gene_version gene_name gene_source
## gene_biotype
## colnames(47681): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1 ...
## TTGTTTCCATACAACT-1 TTGTTTGTGTAAATTC-1
## colData names(19): barcode sample_name ... key cell_count
## reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80
## UMAP_neighbors15
## spikeNames(0):
## altExpNames(0):
# vector of top HVGs
head(top.hvgs)
## [1] "ENSG00000110484" "ENSG00000123560" "ENSG00000197971" "ENSG00000131095"
## [5] "ENSG00000124935" "ENSG00000211592"
# load spreadsheet containing SpatialDE results
spatialDE_results <- read_csv("../../data/Human_DLPFC_Visium_processedData_sce_scran_spatialDE_results.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## FSV = col_double(),
## M = col_double(),
## g = col_character(),
## l = col_double(),
## max_delta = col_double(),
## max_ll = col_double(),
## max_mu_hat = col_double(),
## max_s2_t_hat = col_double(),
## model = col_character(),
## n = col_double(),
## s2_FSV = col_double(),
## s2_logdelta = col_double(),
## time = col_double(),
## BIC = col_double(),
## max_ll_null = col_double(),
## LLR = col_double(),
## pval = col_double(),
## qval = col_double()
## )
Compare list of HVGs vs. list of significant genes from SpatialDE.
# select significant genes from SpatialDE
spatialDE_sig <- filter(spatialDE_results, qval < 0.05)
spatialDE_sig_genes <- spatialDE_sig$g
head(spatialDE_sig_genes)
## [1] "ENSG00000158286" "ENSG00000175206" "ENSG00000163909" "ENSG00000173218"
## [5] "ENSG00000272824" "ENSG00000256029"
length(spatialDE_sig_genes)
## [1] 1924
# compare HVGs vs. significant genes from SpatialDE
head(top.hvgs)
## [1] "ENSG00000110484" "ENSG00000123560" "ENSG00000197971" "ENSG00000131095"
## [5] "ENSG00000124935" "ENSG00000211592"
head(spatialDE_sig_genes)
## [1] "ENSG00000158286" "ENSG00000175206" "ENSG00000163909" "ENSG00000173218"
## [5] "ENSG00000272824" "ENSG00000256029"
length(top.hvgs)
## [1] 1942
length(spatialDE_sig_genes)
## [1] 1924
sum(spatialDE_sig_genes %in% top.hvgs)
## [1] 742
sum(top.hvgs %in% spatialDE_sig_genes)
## [1] 742
Histogram of p-values. Note that a large number of genes have q-values exactly equal to zero.
hist(spatialDE_sig$qval)
# number of genes with q-value equal to 0
sum(spatialDE_sig$qval == 0)
## [1] 387
Create some plots showing expression (UMI counts) of the top significant genes from Spatial DE, for sample 151673.
Note: since there are 387 genes with q-values exactly equal to 0, we plot a random set of 10 of these.
# select spots from sample 151673 only
ix_151673 <- colData(sce)$sample_name == 151673
table(ix_151673)
## ix_151673
## FALSE TRUE
## 44042 3639
sce_151673 <- sce[, ix_151673]
sce_151673
## class: SingleCellExperiment
## dim: 33538 3639
## metadata(1): image
## assays(2): counts logcounts
## rownames(33538): ENSG00000243485 ENSG00000237613 ... ENSG00000277475
## ENSG00000268674
## rowData names(5): gene_id gene_version gene_name gene_source
## gene_biotype
## colnames(3639): AAACAAGTATCTCCCA-1 AAACAATCTACTAGCA-1 ...
## TTGTTTGTATTACACG-1 TTGTTTGTGTAAATTC-1
## colData names(19): barcode sample_name ... key cell_count
## reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80
## UMAP_neighbors15
## spikeNames(0):
## altExpNames(0):
# extract x-y coordinates of spots (note: y coordinate is reversed)
xy_coords <- data.frame(
x_coord = colData(sce_151673)[, c("imagecol")],
y_coord = -colData(sce_151673)[, c("imagerow")]
)
# select top significant genes from SpatialDE
spatialDE_sig <- mutate(spatialDE_sig, rank = rank(qval, ties.method = "first"))
n_top <- 10
spatialDE_top <- filter(spatialDE_sig, rank <= n_top)
spatialDE_top
## # A tibble: 10 x 20
## X1 FSV M g l max_delta max_ll max_mu_hat max_s2_t_hat model
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 8326 0.989 4 ENSG… 5.83 0.0113 3146. 0.504 0.0922 SE
## 2 8352 1.000 4 ENSG… 5.83 0.0000454 4792. 0.503 0.0912 SE
## 3 8364 1.000 4 ENSG… 5.83 0.0000454 4779. 0.499 0.0897 SE
## 4 8365 1.000 4 ENSG… 5.83 0.0000454 4033. 0.502 0.0911 SE
## 5 8368 1.000 4 ENSG… 5.83 0.0000454 4092. 0.497 0.0892 SE
## 6 8373 1.000 4 ENSG… 5.83 0.0000454 3766. 0.509 0.0937 SE
## 7 8389 1.000 4 ENSG… 5.83 0.0000454 4055. 0.499 0.0901 SE
## 8 8394 1.000 4 ENSG… 5.83 0.0000454 4743. 0.499 0.0898 SE
## 9 8408 1.000 4 ENSG… 5.83 0.0000454 4087. 0.504 0.0918 SE
## 10 8412 1.000 4 ENSG… 5.83 0.0000454 4051. 0.497 0.0892 SE
## # … with 10 more variables: n <dbl>, s2_FSV <dbl>, s2_logdelta <dbl>,
## # time <dbl>, BIC <dbl>, max_ll_null <dbl>, LLR <dbl>, pval <dbl>,
## # qval <dbl>, rank <int>
spatialDE_top_genes <- spatialDE_top$g
spatialDE_top_genes
## [1] "ENSG00000122420" "ENSG00000272721" "ENSG00000182952" "ENSG00000271754"
## [5] "ENSG00000229921" "ENSG00000228434" "ENSG00000280195" "ENSG00000253196"
## [9] "ENSG00000285254" "ENSG00000270087"
# get expression levels (UMI counts) for top significant genes from SpatialDE
exprs_151673_spatialDE_top <- counts(sce_151673)[spatialDE_top_genes, ]
dim(exprs_151673_spatialDE_top)
## [1] 10 3639
# match to spots and set up data frame for ggplot2
stopifnot(all(colData(sce_151673)$barcode == rownames(t(exprs_151673_spatialDE_top))))
stopifnot(length(colData(sce_151673)$barcode) == length(rownames(t(exprs_151673_spatialDE_top))))
d_plot <- cbind(
barcode = colData(sce_151673)$barcode,
xy_coords,
as.data.frame(as.matrix(t(exprs_151673_spatialDE_top)))
)
head(d_plot)
## barcode x_coord y_coord ENSG00000122420
## AAACAAGTATCTCCCA-1 AAACAAGTATCTCCCA-1 440.6391 -381.0981 0
## AAACAATCTACTAGCA-1 AAACAATCTACTAGCA-1 259.6310 -126.3276 0
## AAACACCAATAACTGC-1 AAACACCAATAACTGC-1 183.0783 -427.7678 0
## AAACAGAGCGACTCCT-1 AAACAGAGCGACTCCT-1 417.2367 -186.8137 0
## AAACAGCTTTCAGAAG-1 AAACAGCTTTCAGAAG-1 152.7003 -341.2691 0
## AAACAGGGTCTATATT-1 AAACAGGGTCTATATT-1 164.9415 -362.9163 0
## ENSG00000272721 ENSG00000182952 ENSG00000271754
## AAACAAGTATCTCCCA-1 0 0 0
## AAACAATCTACTAGCA-1 0 0 0
## AAACACCAATAACTGC-1 0 0 0
## AAACAGAGCGACTCCT-1 0 0 0
## AAACAGCTTTCAGAAG-1 0 0 0
## AAACAGGGTCTATATT-1 0 0 0
## ENSG00000229921 ENSG00000228434 ENSG00000280195
## AAACAAGTATCTCCCA-1 0 0 0
## AAACAATCTACTAGCA-1 0 0 0
## AAACACCAATAACTGC-1 0 0 0
## AAACAGAGCGACTCCT-1 0 0 0
## AAACAGCTTTCAGAAG-1 0 0 0
## AAACAGGGTCTATATT-1 0 0 0
## ENSG00000253196 ENSG00000285254 ENSG00000270087
## AAACAAGTATCTCCCA-1 0 0 0
## AAACAATCTACTAGCA-1 0 0 0
## AAACACCAATAACTGC-1 0 0 0
## AAACAGAGCGACTCCT-1 0 0 0
## AAACAGCTTTCAGAAG-1 0 0 0
## AAACAGGGTCTATATT-1 0 0 0
d_plot <- melt(
d_plot,
id.vars = c("barcode", "x_coord", "y_coord"),
variable.name = "gene_id",
value.name = "UMIs"
)
head(d_plot)
## barcode x_coord y_coord gene_id UMIs
## 1 AAACAAGTATCTCCCA-1 440.6391 -381.0981 ENSG00000122420 0
## 2 AAACAATCTACTAGCA-1 259.6310 -126.3276 ENSG00000122420 0
## 3 AAACACCAATAACTGC-1 183.0783 -427.7678 ENSG00000122420 0
## 4 AAACAGAGCGACTCCT-1 417.2367 -186.8137 ENSG00000122420 0
## 5 AAACAGCTTTCAGAAG-1 152.7003 -341.2691 ENSG00000122420 0
## 6 AAACAGGGTCTATATT-1 164.9415 -362.9163 ENSG00000122420 0
# generate plots
ggplot(d_plot, aes(x = x_coord, y = y_coord, color = UMIs)) +
facet_wrap(~ gene_id) +
geom_point(size = 0.8, alpha = 1) +
scale_color_gradient(low = "gray90", high = "red") +
coord_fixed() +
theme_bw() +
ggtitle("UMIs of top SpatialDE genes for sample 151673")
filename <- "../plots/spatialDE/spatialDE_top_genes_151673.png"
ggsave(filename, width = 15, height = 13)
Compare to plots showing expression (UMI counts) of some known marker genes.
Currently using marker genes SNAP25, MOBP, and PCP4 (from Kristen Maynard’s slide presentation).
Could also compare with expression of some of the top HVGs (however would need q-values for the HVGs to do this).
# choose marker genes and get expression levels (UMI counts)
marker_genes <- c("SNAP25", "MOBP", "PCP4")
ix_marker_genes <- match(marker_genes, rowData(sce_151673)$gene_name)
exprs_marker_genes <- counts(sce_151673)[ix_marker_genes, ]
# use original gene names
rownames(exprs_marker_genes) <- marker_genes
dim(exprs_marker_genes)
## [1] 3 3639
# match to spots and set up data frame for ggplot2
stopifnot(all(colData(sce_151673)$barcode == rownames(t(exprs_marker_genes))))
stopifnot(length(colData(sce_151673)$barcode) == length(rownames(t(exprs_marker_genes))))
d_plot <- cbind(
barcode = colData(sce_151673)$barcode,
xy_coords,
as.data.frame(as.matrix(t(exprs_marker_genes)))
)
head(d_plot)
## barcode x_coord y_coord SNAP25 MOBP PCP4
## AAACAAGTATCTCCCA-1 AAACAAGTATCTCCCA-1 440.6391 -381.0981 16 1 0
## AAACAATCTACTAGCA-1 AAACAATCTACTAGCA-1 259.6310 -126.3276 8 0 1
## AAACACCAATAACTGC-1 AAACACCAATAACTGC-1 183.0783 -427.7678 4 7 0
## AAACAGAGCGACTCCT-1 AAACAGAGCGACTCCT-1 417.2367 -186.8137 17 2 1
## AAACAGCTTTCAGAAG-1 AAACAGCTTTCAGAAG-1 152.7003 -341.2691 10 1 0
## AAACAGGGTCTATATT-1 AAACAGGGTCTATATT-1 164.9415 -362.9163 14 3 2
d_plot <- melt(
d_plot,
id.vars = c("barcode", "x_coord", "y_coord"),
variable.name = "gene_id",
value.name = "UMIs"
)
head(d_plot)
## barcode x_coord y_coord gene_id UMIs
## 1 AAACAAGTATCTCCCA-1 440.6391 -381.0981 SNAP25 16
## 2 AAACAATCTACTAGCA-1 259.6310 -126.3276 SNAP25 8
## 3 AAACACCAATAACTGC-1 183.0783 -427.7678 SNAP25 4
## 4 AAACAGAGCGACTCCT-1 417.2367 -186.8137 SNAP25 17
## 5 AAACAGCTTTCAGAAG-1 152.7003 -341.2691 SNAP25 10
## 6 AAACAGGGTCTATATT-1 164.9415 -362.9163 SNAP25 14
# generate plots
ggplot(d_plot, aes(x = x_coord, y = y_coord, color = UMIs)) +
facet_wrap(~ gene_id) +
geom_point(size = 0.8, alpha = 1) +
scale_color_gradientn(colors = c("gray90", "red", "blue"), values = rescale(c(0, 20, 85))) +
coord_fixed() +
theme_bw() +
ggtitle("UMIs of known marker genes for sample 151673")
filename <- "../plots/spatialDE/marker_genes_151673.png"
ggsave(filename, width = 11, height = 5)